Dissertation Appendix C
Chapter 2: Regression based random forest modeling of inland pacific northwest (iPNW) drought-related wheat insurance loss, using time lagged climate correlation matrix assocation
Appendix C documents Chapter 2 analyses related to agricultural commodity loss, at a county level, from 2001-2015, across the 24 county region of the Inland Pacific Northwest (IPNW). In this analysis, we use a time lagged matrix approach to examine a range of combinations of climate and insurance loss for wheat due to drought.
Our analysis can be described in 5 specific steps:
Step 1. We subset insurance loss claims by wheat due to drought
Step 2. We aggregate climate data by month and county for the 24 county study area
Step 3. We construct a design matrix of all monthly climate combinations, per county and per climate variable.
Step 4. We select the monthly combination that has the highest correlation with wheat/drought claims, per county, for each year (2001 to 2015). Based on the output, we assemble an optimized climate/insurance loss dataset for all counties.
Step 5. We examine conditional relationships of climate to insurance loss using regression based rand forest analysis
In step 1, we perform a broad examination of wheat insurance loss due to drought for our 24 county iPNW study area, by calculating the ratio of wheat/drought acreage claims vs. all other wheat related damage cause claims, and comparing that data to individual climate variables. Example: For Whitman County, WA - we calculated the total amount of insurance claim acreage for wheat due to drought (for 2001 to 2015 combined), and divided that by the total amount of wheat insurane acreage for all other damage causes (excessive moisture, hail, frost, freeze, etc). We did this for each of the 24 counties.
We then calculate a summary value for each climate variable (plus aridity - which is PR / PET), for each county, from 2001 to 2015. We then constructed three scatterplots for comparison. Each point represents a county. For precipitation and potential evapotranspiration, we use the average annual total.
ipnw_climate_county_comparison <- function(var_commodity, var_damage) {
library(plotrix)
library(ggplot2)
library(gridExtra)
library(RCurl)
#----load climate data
URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climatology/climatology.zip"
destfile <- "/tmp/climatology.zip"
download.file(URL, destfile)
outDir<-"/tmp/climatology/"
unzip(destfile,exdir=outDir)
ID2001 <- read.csv("/tmp/climatology/Idaho_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2002 <- read.csv("/tmp/climatology/Idaho_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2003 <- read.csv("/tmp/climatology/Idaho_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2004 <- read.csv("/tmp/climatology/Idaho_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2005 <- read.csv("/tmp/climatology/Idaho_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2006 <- read.csv("/tmp/climatology/Idaho_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2007 <- read.csv("/tmp/climatology/Idaho_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2008 <- read.csv("/tmp/climatology/Idaho_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2009 <- read.csv("/tmp/climatology/Idaho_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2010 <- read.csv("/tmp/climatology/Idaho_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2011 <- read.csv("/tmp/climatology/Idaho_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2012 <- read.csv("/tmp/climatology/Idaho_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2013 <- read.csv("/tmp/climatology/Idaho_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2014 <- read.csv("/tmp/climatology/Idaho_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2015 <- read.csv("/tmp/climatology/Idaho_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2001 <- read.csv("/tmp/climatology/Oregon_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2002 <- read.csv("/tmp/climatology/Oregon_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2003 <- read.csv("/tmp/climatology/Oregon_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2004 <- read.csv("/tmp/climatology/Oregon_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2005 <- read.csv("/tmp/climatology/Oregon_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2006 <- read.csv("/tmp/climatology/Oregon_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2007 <- read.csv("/tmp/climatology/Oregon_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2008 <- read.csv("/tmp/climatology/Oregon_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2009 <- read.csv("/tmp/climatology/Oregon_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2010 <- read.csv("/tmp/climatology/Oregon_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2011 <- read.csv("/tmp/climatology/Oregon_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2012 <- read.csv("/tmp/climatology/Oregon_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2013 <- read.csv("/tmp/climatology/Oregon_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2014 <- read.csv("/tmp/climatology/Oregon_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2015 <- read.csv("/tmp/climatology/Oregon_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2001 <- read.csv("/tmp/climatology/Washington_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2002 <- read.csv("/tmp/climatology/Washington_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2003 <- read.csv("/tmp/climatology/Washington_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2004 <- read.csv("/tmp/climatology/Washington_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2005 <- read.csv("/tmp/climatology/Washington_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2006 <- read.csv("/tmp/climatology/Washington_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2007 <- read.csv("/tmp/climatology/Washington_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2008 <- read.csv("/tmp/climatology/Washington_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2009 <- read.csv("/tmp/climatology/Washington_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2010 <- read.csv("/tmp/climatology/Washington_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2011 <- read.csv("/tmp/climatology/Washington_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2012 <- read.csv("/tmp/climatology/Washington_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2013 <- read.csv("/tmp/climatology/Washington_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2014 <- read.csv("/tmp/climatology/Washington_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2015 <- read.csv("/tmp/climatology/Washington_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ziggy.df <- rbind(ID2001, ID2002, ID2003, ID2004, ID2005, ID2006, ID2007, ID2008, ID2009, ID2010, ID2011, ID2012, ID2013, ID2014, ID2015)
ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)
ziggy.df <- rbind(WA2001, WA2002, WA2003, WA2004, WA2005, WA2006, WA2007, WA2008, WA2009, WA2010, WA2011, WA2012, WA2013, WA2014, WA2015)
WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)
ziggy.df <- rbind(OR2001, OR2002, OR2003, OR2004, OR2005, OR2006, OR2007, OR2008, OR2009, OR2010, OR2011, OR2012, OR2013, OR2014, OR2015)
OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)
countiesfips <- read.csv(text=getURL
("https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/counties/counties_fips.csv"),
header = TRUE, strip.white = TRUE, sep = ",")
colnames(countiesfips) <- c("countyfips", "county", "state")
OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))
#------add crop insurance data
URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/RMA_originaldata/RMA_originaldata_txt.zip"
destfile <- "/tmp/RMA_originaldata_txt.zip"
download.file(URL, destfile)
outDir<-"/tmp/RMA_originaldata/"
unzip(destfile,exdir=outDir)
rma1989 <- read.csv("/tmp/RMA_originaldata/1989.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1990 <- read.csv("/tmp/RMA_originaldata/1990.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1991 <- read.csv("/tmp/RMA_originaldata/1991.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1992 <- read.csv("/tmp/RMA_originaldata/1992.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1993 <- read.csv("/tmp/RMA_originaldata/1993.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1994 <- read.csv("/tmp/RMA_originaldata/1994.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1995 <- read.csv("/tmp/RMA_originaldata/1995.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1996 <- read.csv("/tmp/RMA_originaldata/1996.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1997 <- read.csv("/tmp/RMA_originaldata/1997.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1998 <- read.csv("/tmp/RMA_originaldata/1998.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1999 <- read.csv("/tmp/RMA_originaldata/1999.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2000 <- read.csv("/tmp/RMA_originaldata/2000.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2001 <- read.csv("/tmp/RMA_originaldata/2001.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2002 <- read.csv("/tmp/RMA_originaldata/2002.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2003 <- read.csv("/tmp/RMA_originaldata/2003.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2004 <- read.csv("/tmp/RMA_originaldata/2004.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2005 <- read.csv("/tmp/RMA_originaldata/2005.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2006 <- read.csv("/tmp/RMA_originaldata/2006.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2007 <- read.csv("/tmp/RMA_originaldata/2007.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2008 <- read.csv("/tmp/RMA_originaldata/2008.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2009 <- read.csv("/tmp/RMA_originaldata/2009.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2010 <- read.csv("/tmp/RMA_originaldata/2010.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2011 <- read.csv("/tmp/RMA_originaldata/2011.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2012 <- read.csv("/tmp/RMA_originaldata/2012.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2013 <- read.csv("/tmp/RMA_originaldata/2013.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2014 <- read.csv("/tmp/RMA_originaldata/2014.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2015 <- read.csv("/tmp/RMA_originaldata/2015.txt", sep = "|", header = FALSE, strip.white = TRUE)
#load individual files from 1989 to 2000
RMA_1989 <- rbind(rma1989, rma1990, rma1991, rma1992, rma1993, rma1994, rma1995, rma1996, rma1997, rma1998, rma1999, rma2000)
#filter columns
RMA_1989 <- data.frame(RMA_1989[,1], RMA_1989[,3], RMA_1989[,5], RMA_1989[,7], RMA_1989[,12], RMA_1989[,13], RMA_1989[,14], RMA_1989[,15])
#set column names
colnames(RMA_1989) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "loss")
#load individual files from 2001 to 2015
RMA <- rbind(rma2001, rma2002, rma2003, rma2004, rma2005, rma2006, rma2007, rma2008, rma2009, rma2010, rma2011, rma2012, rma2013, rma2014, rma2015)
#filter columns
RMA <- data.frame(RMA[,1], RMA[,3], RMA[,5], RMA[,7], RMA[,12], RMA[,13], RMA[,14], RMA[,15], RMA[,16])
#set column names
colnames(RMA) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "acres", "loss")
#subset 2001 to 2015 for ID, WA, and OR
RMA_PNW <- subset(RMA, state == "WA" | state == "OR" | state == "ID" )
#subset 1989 to 2000 for ID, WA, and OR
RMA_PNW_1989 <- subset(RMA_1989, state == "WA" | state == "OR" | state == "ID" )
#temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
#myfiles = lapply(temp, read.csv, header = TRUE)
#ziggy.df <- do.call(rbind , myfiles)
RMA_PNW <- as.data.frame(RMA_PNW)
RMA_PNW$county <- as.character(RMA_PNW$county)
RMA_PNW$damagecause <- as.character(RMA_PNW$damagecause)
xrange <- RMA_PNW
RMA_PNW$commodity <- trimws(RMA_PNW$commodity)
RMA_PNW$county <- trimws(RMA_PNW$county)
RMA_PNW$damagecause <- trimws(RMA_PNW$damagecause)
ID_RMA_PNW <- subset(RMA_PNW, state == "ID")
#--acres for all damage causes aggregated
ID_loss_commodity <- subset(ID_RMA_PNW, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("county", "state", "all_damagecause_acres")
#-loss and lossperacre for just one damage cause
ID_loss1 <- subset(ID_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("county", "state", "loss")
colnames(ID_loss2_acres) <- c("county", "state", "acres")
ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres
#--
ID_loss_climate <- merge(ID_loss2, ID2, by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("county", "state"))
#---WA
WA_RMA_PNW <- subset(RMA_PNW, state == "WA")
#--acres for all damage causes aggregated
WA_loss_commodity <- subset(WA_RMA_PNW, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("county", "state", "all_damagecause_acres")
WA_loss1 <- subset(WA_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("county", "state", "loss")
colnames(WA_loss2_acres) <- c("county", "state", "acres")
WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres
WA_loss_climate <- merge(WA_loss2, WA2, by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("county", "state"))
#--OR
OR_RMA_PNW <- subset(RMA_PNW, state == "OR")
#--acres for all damage causes aggregated
OR_loss_commodity <- subset(OR_RMA_PNW, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("county", "state", "all_damagecause_acres")
OR_loss1 <- subset(OR_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("county", "state", "loss")
colnames(OR_loss2_acres) <- c("county", "state", "acres")
OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres
OR_loss_climate <- merge(OR_loss2, OR2, by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("county", "state"))
#-merge all three states
iPNW_loss_climate <- rbind(OR_loss_climate_2, ID_loss_climate_2, WA_loss_climate_2)
#--subset to only iPNW 24 counties
Franklin <- subset(iPNW_loss_climate, county == "Franklin" & state == "WA")
iPNW_loss_climate <- subset(iPNW_loss_climate, county == "Benewah"
| county == "Latah" | county == "Nez Perce" | county == "Lewis"
| county == "Idaho" | county == "Wasco" | county == "Sherman"
| county == "Gilliam" | county == "Morrow" | county == "Umatilla"
| county == "Union" | county == "Wallowa" | county == "Douglas"
| county == "Walla Walla" | county == "Benton" | county == "Columbia"
| county == "Asotin" | county == "Garfield"
| county == "Grant" | county =="Whitman" | county == "Spokane"
| county == "Lincoln" | county == "Adams" )
iPNW_loss_climate <- rbind(iPNW_loss_climate, Franklin)
iPNW_loss_climate$pct_acreage <- iPNW_loss_climate$acres / iPNW_loss_climate$all_damagecause_acres
#write.csv(iPNW_loss_climate, file = "/tmp/iPNW_loss_climate.csv")
#iPNW_loss_climate <- subset(iPNW_loss_climate, year == var_year)
#tmmx
iPNW_loss_climate_tmmx <- iPNW_loss_climate[order(iPNW_loss_climate$tmmx),]
iPNW_loss_climate_tmmx$tmmx <- iPNW_loss_climate_tmmx$tmmx - 273
#not needed#yrangemin <- min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) - (.05 * min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))
#not needed#yrangemax <- max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) + (.05 * max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))
#y2rangemin <- min(iPNW_loss_climate_tmmx$tmmx) - (.05 * min(iPNW_loss_climate_tmmx$tmmx))
#y2rangemax <- max(iPNW_loss_climate_tmmx$tmmx) + (.05 * max(iPNW_loss_climate_tmmx$tmmx))
#twoord.plot(c(1:nrow(iPNW_loss_climate_tmmx)), iPNW_loss_climate_tmmx$pct_acreage, c(1:nrow(iPNW_loss_climate_tmmx)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_tmmx$tmmx, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean Max Temperature (C)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to mean TMMX", sep=""))
#text(1:nrow(iPNW_loss_climate_tmmx), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_tmmx$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 2)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 2)
#mtext(4, text = "Annual Mean Precipitation (mm)", line = 1, cex = 2)
#pr
par(mfrow=c(1,3),mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))
iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),]
iPNW_loss_climate_pr$pr_year <- iPNW_loss_climate_pr$pr * 12
pr <- ggplot(iPNW_loss_climate_pr, aes(x=pr_year, y=pct_acreage)) +
geom_point()+
geom_smooth(method = "loess")+
xlab("Annual Precipitation (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
#yrangemin <- min(iPNW_loss_climate_pr$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pr$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1, cex = 1.5,
#not needed# labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PR (mm)", line = 1, cex = 1.5)
#mtext(3, text = "2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean precipitation/county", line = 1, cex = 2)
#pr
#par(mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
#iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),]
#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), ylab = "% wheat/drought insurance loss acreage", xticklab=c(" "), rylab = "Annual Mean Precipitation (mm)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste("2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean PR/county", sep=""))
#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 9, cex = 1)
#pet
iPNW_loss_climate_pet <- iPNW_loss_climate[order(iPNW_loss_climate$pet),]
iPNW_loss_climate_pet$pet_year <- iPNW_loss_climate_pet$pet * 12
pet <- ggplot(iPNW_loss_climate_pet, aes(x=pet_year, y=pct_acreage)) +
geom_point()+
geom_smooth() +
xlab("Annual PET (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pet$pet) - (.05 * min(iPNW_loss_climate_pet$pet))
#y2rangemax <- max(iPNW_loss_climate_pet$pet) + (.05 * max(iPNW_loss_climate_pet$pet))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pet)), iPNW_loss_climate_pet$pct_acreage, c(1:nrow(iPNW_loss_climate_pet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pet$pet, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pet), 0 - .05, srt = 90, adj = 1,
#not needed# labels = iPNW_loss_climate_pet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PET (mm)", line = 1, cex = 1.5)
#pr/pet
iPNW_loss_climate_prpet <- iPNW_loss_climate
iPNW_loss_climate_prpet$pet_year <- iPNW_loss_climate_prpet$pet * 12
iPNW_loss_climate_prpet$pr_year <- iPNW_loss_climate_prpet$pr * 12
iPNW_loss_climate_prpet$prpet <- iPNW_loss_climate_prpet$pr_year / iPNW_loss_climate_prpet$pet_year
#iPNW_loss_climate_prpet <- iPNW_loss_climate[order(-iPNW_loss_climate$prpet),]
# ggplot scatterplot
prpet <- ggplot(iPNW_loss_climate_prpet, aes(x=prpet, y=pct_acreage)) +
geom_point()+
geom_smooth()+
xlab("Aridity Index") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
grid.arrange(pr, pet, prpet, nrow = 1)
#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#dual axis barplot
#y2rangemin <- min(iPNW_loss_climate_prpet$prpet) - (.05 * min(iPNW_loss_climate_prpet$prpet))
#y2rangemax <- max(iPNW_loss_climate_prpet$prpet) + (.05 * max(iPNW_loss_climate_prpet$prpet))
#twoord.plot(c(1:nrow(iPNW_loss_climate_prpet)), iPNW_loss_climate_prpet$pct_acreage, c(1:nrow(iPNW_loss_climate_prpet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_prpet$prpet, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_prpet), 0 - .05, srt = 90, adj = 1,
#not needed# labels = iPNW_loss_climate_prpet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#not needed#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean Aridity Index (PR/PET (mm))", line = 1, cex = 1.5)
#pdsi - not used
#iPNW_loss_climate_pdsi <- iPNW_loss_climate[order(iPNW_loss_climate$pdsi),]
#not needed#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#not needed#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pdsi$pdsi) - (-.25 * min(iPNW_loss_climate_pdsi$pdsi))
#y2rangemax <- max(iPNW_loss_climate_pdsi$pdsi) + (-.5 * max(iPNW_loss_climate_pdsi$pdsi))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pdsi)), iPNW_loss_climate_pdsi$pct_acreage, c(1:nrow(iPNW_loss_climate_pdsi)), rylim=c(y2rangemin, y2rangemax), lylim=c(0,1), iPNW_loss_climate_pdsi$pdsi, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean PDSI", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to PDSI", sep=""))
#text(1:nrow(iPNW_loss_climate_pdsi), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_pdsi$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 5)
}
ipnw_climate_county_comparison("WHEAT", "Drought")
For the three examined climate variables (potential evapotranspiration, precipitation, maximum temperature), a design matrix was developed for each of the 24 counties that are part of the defined iPNW study area. For each county, an absolute correlation was calculated for each monthly combination for each climate variable to the total wheat insurance loss due to drought. The result is a design matrix map that identifies the monthly combination with the highest correlation For example, for Whitman county, WA, for maximum temperature - the monthly combination with highest correlation between max temperature and wheat insurance loss due to drought was May/June/July (denoted as July 2).
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "tmmx"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_matrices/climate_matrices.zip"
destfile <- "/tmp/climate_matrices.zip"
download.file(URL, destfile)
outDir<-"/tmp/climate_matrices"
unzip(destfile,exdir=outDir)
URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_correlation_summaries/climate_correlations_summaries.zip"
destfile <- "/tmp/climate_correlations_summaries.zip"
download.file(URL, destfile)
outDir<-"/tmp/climate_correlations_summaries"
unzip(destfile,exdir=outDir)
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pet"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red")), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pr"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pdsi"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
In step 3, we generate maps of our climate-lagged correlations, for each climate variable. Each county is labeled with the optimum monthly period that has the highest correlation for that climate variable and wheat/drought insurance loss (July 2 = two months previous to July, or May/June/July), along with the correlation value. Each correlation value is absolute (correlation coefficent - R)
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pr"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_correlations/climate_correlations.zip"
destfile <- "/tmp/climate_correlations.zip"
download.file(URL, destfile)
outDir<-"/tmp/climate_correlations"
unzip(destfile,exdir=outDir)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("red", "orange", "yellow", "lightyellow"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal2 <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
myLabelFormat = function(..., reverse_order = FALSE){
if(reverse_order){
function(type = "numeric", cuts){
cuts <- sort(cuts, decreasing = T)
}
}else{
labelFormat(...)
}
}
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal2, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright", labFormat = labelFormat(transform = function(x) sort(x, decreasing = TRUE)))
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pet"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright")
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "tmmx"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright")
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pdsi"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("red", "orange", "yellow", "lightyellow"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal2 <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
myLabelFormat = function(..., reverse_order = FALSE){
if(reverse_order){
function(type = "numeric", cuts){
cuts <- sort(cuts, decreasing = T)
}
}else{
labelFormat(...)
}
}
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal2, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright", labFormat = labelFormat(transform = function(x) sort(x, decreasing = TRUE)))
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
var_commodity <- "WHEAT"
var_damage <- "Drought"
#load wheat pricing
wheatprice <- read.csv(text=RCurl::getURL
("https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/wheat_prices/wheat_prices_1989_2015.csv"), header = TRUE, strip.white = TRUE, sep = ",")
wheatprice <- wheatprice[-1]
colnames(wheatprice) <- c("year", "price")
#--accessing output of design matrix/time lag data based on monthly selection from dashboard runs
URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_outputs/climate_outputs.zip"
destfile <- "/tmp/climate_outputs.zip"
download.file(URL, destfile)
outDir<-"/tmp/climate_outputs"
unzip(destfile,exdir=outDir)
setwd("/tmp/climate_outputs")
var1 <- read.csv("pr_jun2_cube_root_loss_climatecorrelation.csv")
colnames(var1)[9] <- paste(colnames(var1)[2], "_zscore", sep="")
var2 <- read.csv("pet_jul3_cube_root_loss_climatecorrelation.csv")
colnames(var2)[9] <- paste(colnames(var2)[2], "_zscore", sep="")
var3 <- read.csv("tmmx_jul2_cube_root_loss_climatecorrelation.csv")
colnames(var3)[9] <- paste(colnames(var3)[2], "_zscore", sep="")
var4 <- read.csv("pr_jun2_cube_root_acres_climatecorrelation.csv")
colnames(var4)[9] <- paste(colnames(var4)[2], "_zscore", sep="")
var5 <- read.csv("pet_jun2_loss_per_acre_climatecorrelation.csv")
colnames(var5)[9] <- paste(colnames(var5)[2], "_zscore", sep="")
var6 <- read.csv("tmmx_jun1_cube_root_acres_climatecorrelation.csv")
colnames(var6)[9] <- paste(colnames(var6)[2], "_zscore", sep="")
var7 <- read.csv("pr_sep5_loss_climatedata.csv")
colnames(var7)[9] <- paste(colnames(var7)[2], "_zscore", sep="")
var8 <- read.csv("pet_sep5_loss_climatedata.csv")
colnames(var8)[9] <- paste(colnames(var8)[2], "_zscore", sep="")
var9 <- read.csv("tmmx_jul2_loss_climatedata.csv")
colnames(var9)[9] <- paste(colnames(var9)[2], "_zscore", sep="")
var9x <- read.csv("fm100_jul3_cube_root_loss_climatedata.csv")
colnames(var9x)[9] <- paste(colnames(var9x)[2], "_zscore", sep="")
var10x <- read.csv("fm1000_aug2_cube_root_loss_climatedata.csv")
colnames(var10x)[9] <- paste(colnames(var10x)[2], "_zscore", sep="")
var11x <- read.csv("pdsi_sep4_cube_root_loss_climatedata.csv")
colnames(var11x)[9] <- paste(colnames(var11x)[2], "_zscore", sep="")
var12x <- read.csv("pdsi_sep4_cube_root_loss_climatecorrelation.csv")
colnames(var12x)[9] <- paste(colnames(var12x)[2], "_zscore", sep="")
var7a <- read.csv("pr_sep5_loss_climatecorrelation.csv")
colnames(var7a)[9] <- paste(colnames(var7a)[2], "_zscore", sep="")
var8a <- read.csv("pet_sep5_loss_climatecorrelation.csv")
colnames(var8a)[9] <- paste(colnames(var8a)[2], "_zscore", sep="")
var9a <- read.csv("tmmx_jul2_loss_climatecorrelation.csv")
colnames(var9a)[9] <- paste(colnames(var9a)[2], "_zscore", sep="")
data1 <- cbind(var1, var2[9], var3[9])
data2 <- cbind(var1[1:6], var2[2], var3[2])
data3 <- cbind(var4[1:6], var5[2], var6[2])
data3 <- plyr::join(data3, wheatprice_year, by = "year")
data4 <- cbind(var7[1:6], var8[2], var9[2], var9x[2], var10x[2], var11x[2], var12x[3] )
#data4$prpet <- data4$pr / data4$pet
data4a <- dplyr::left_join(data4, var7a, by = c("year" = "year", "county" = "county"))
data4a <- dplyr::left_join(data4a, wheatprice, by = c("year"))
data4aa <- na.omit(data4a)
colnames(data4aa) <- c("X", "pr", "year", "pr_zscore", "damagecause", "county", "pet", "tmmx", "fm100", "fm1000", "pdsi", "cube_loss", "X.y", "pr2", "loss", "state", "commodity", "matrixnumber", "clim_zscore", "loss_zscore", "price")
data4aa$prpet <- data4aa$pr / data4aa$pet
data4aa <- subset(data4aa, , c(year, damagecause, county, commodity, state, matrixnumber, cube_loss, pr, pdsi, pet, tmmx, fm100, fm1000, price, prpet))
#write.csv(data4aa, file = "/dmine/data/USDA/agmesh-scenarios/Allstates/summaries/lag_palouse1.csv")
data4aa$pr_scaled <- scale(data4aa$pr, scale = TRUE, center = TRUE)
data4aa$tmmx_scaled <- scale(data4aa$tmmx, scale = TRUE, center = TRUE)
data4aa$pet_scaled <- scale(data4aa$pet, scale = TRUE, center = TRUE)
data4aa$pdsi_scaled <- scale(data4aa$pdsi, scale = TRUE, center = TRUE)
data4aa$price_scaled <- scale(data4aa$price, scale = TRUE, center = TRUE)
#percentage acreage by county and year, per state
library(plotrix)
library(ggplot2)
library(gridExtra)
ID2001 <- read.csv("/tmp/climatology/Idaho_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2002 <- read.csv("/tmp/climatology/Idaho_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2003 <- read.csv("/tmp/climatology/Idaho_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2004 <- read.csv("/tmp/climatology/Idaho_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2005 <- read.csv("/tmp/climatology/Idaho_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2006 <- read.csv("/tmp/climatology/Idaho_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2007 <- read.csv("/tmp/climatology/Idaho_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2008 <- read.csv("/tmp/climatology/Idaho_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2009 <- read.csv("/tmp/climatology/Idaho_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2010 <- read.csv("/tmp/climatology/Idaho_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2011 <- read.csv("/tmp/climatology/Idaho_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2012 <- read.csv("/tmp/climatology/Idaho_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2013 <- read.csv("/tmp/climatology/Idaho_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2014 <- read.csv("/tmp/climatology/Idaho_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2015 <- read.csv("/tmp/climatology/Idaho_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2001 <- read.csv("/tmp/climatology/Oregon_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2002 <- read.csv("/tmp/climatology/Oregon_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2003 <- read.csv("/tmp/climatology/Oregon_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2004 <- read.csv("/tmp/climatology/Oregon_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2005 <- read.csv("/tmp/climatology/Oregon_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2006 <- read.csv("/tmp/climatology/Oregon_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2007 <- read.csv("/tmp/climatology/Oregon_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2008 <- read.csv("/tmp/climatology/Oregon_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2009 <- read.csv("/tmp/climatology/Oregon_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2010 <- read.csv("/tmp/climatology/Oregon_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2011 <- read.csv("/tmp/climatology/Oregon_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2012 <- read.csv("/tmp/climatology/Oregon_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2013 <- read.csv("/tmp/climatology/Oregon_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2014 <- read.csv("/tmp/climatology/Oregon_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2015 <- read.csv("/tmp/climatology/Oregon_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2001 <- read.csv("/tmp/climatology/Washington_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2002 <- read.csv("/tmp/climatology/Washington_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2003 <- read.csv("/tmp/climatology/Washington_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2004 <- read.csv("/tmp/climatology/Washington_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2005 <- read.csv("/tmp/climatology/Washington_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2006 <- read.csv("/tmp/climatology/Washington_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2007 <- read.csv("/tmp/climatology/Washington_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2008 <- read.csv("/tmp/climatology/Washington_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2009 <- read.csv("/tmp/climatology/Washington_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2010 <- read.csv("/tmp/climatology/Washington_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2011 <- read.csv("/tmp/climatology/Washington_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2012 <- read.csv("/tmp/climatology/Washington_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2013 <- read.csv("/tmp/climatology/Washington_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2014 <- read.csv("/tmp/climatology/Washington_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2015 <- read.csv("/tmp/climatology/Washington_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ziggy.df <- rbind(ID2001, ID2002, ID2003, ID2004, ID2005, ID2006, ID2007, ID2008, ID2009, ID2010, ID2011, ID2012, ID2013, ID2014, ID2015)
ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)
ziggy.df <- rbind(WA2001, WA2002, WA2003, WA2004, WA2005, WA2006, WA2007, WA2008, WA2009, WA2010, WA2011, WA2012, WA2013, WA2014, WA2015)
WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)
ziggy.df <- rbind(OR2001, OR2002, OR2003, OR2004, OR2005, OR2006, OR2007, OR2008, OR2009, OR2010, OR2011, OR2012, OR2013, OR2014, OR2015)
OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)
countiesfips <- read.csv(text=getURL
("https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/counties/counties_fips.csv"),
header = TRUE, strip.white = TRUE, sep = ",")
colnames(countiesfips) <- c("countyfips", "county", "state")
OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))
rma1989 <- read.csv("/tmp/RMA_originaldata/1989.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1990 <- read.csv("/tmp/RMA_originaldata/1990.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1991 <- read.csv("/tmp/RMA_originaldata/1991.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1992 <- read.csv("/tmp/RMA_originaldata/1992.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1993 <- read.csv("/tmp/RMA_originaldata/1993.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1994 <- read.csv("/tmp/RMA_originaldata/1994.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1995 <- read.csv("/tmp/RMA_originaldata/1995.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1996 <- read.csv("/tmp/RMA_originaldata/1996.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1997 <- read.csv("/tmp/RMA_originaldata/1997.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1998 <- read.csv("/tmp/RMA_originaldata/1998.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1999 <- read.csv("/tmp/RMA_originaldata/1999.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2000 <- read.csv("/tmp/RMA_originaldata/2000.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2001 <- read.csv("/tmp/RMA_originaldata/2001.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2002 <- read.csv("/tmp/RMA_originaldata/2002.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2003 <- read.csv("/tmp/RMA_originaldata/2003.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2004 <- read.csv("/tmp/RMA_originaldata/2004.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2005 <- read.csv("/tmp/RMA_originaldata/2005.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2006 <- read.csv("/tmp/RMA_originaldata/2006.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2007 <- read.csv("/tmp/RMA_originaldata/2007.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2008 <- read.csv("/tmp/RMA_originaldata/2008.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2009 <- read.csv("/tmp/RMA_originaldata/2009.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2010 <- read.csv("/tmp/RMA_originaldata/2010.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2011 <- read.csv("/tmp/RMA_originaldata/2011.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2012 <- read.csv("/tmp/RMA_originaldata/2012.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2013 <- read.csv("/tmp/RMA_originaldata/2013.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2014 <- read.csv("/tmp/RMA_originaldata/2014.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2015 <- read.csv("/tmp/RMA_originaldata/2015.txt", sep = "|", header = FALSE, strip.white = TRUE)
#load individual files from 1989 to 2000
RMA_1989 <- rbind(rma1989, rma1990, rma1991, rma1992, rma1993, rma1994, rma1995, rma1996, rma1997, rma1998, rma1999, rma2000)
#filter columns
RMA_1989 <- data.frame(RMA_1989[,1], RMA_1989[,3], RMA_1989[,5], RMA_1989[,7], RMA_1989[,12], RMA_1989[,13], RMA_1989[,14], RMA_1989[,15])
#set column names
colnames(RMA_1989) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "loss")
#load individual files from 2001 to 2015
RMA <- rbind(rma2001, rma2002, rma2003, rma2004, rma2005, rma2006, rma2007, rma2008, rma2009, rma2010, rma2011, rma2012, rma2013, rma2014, rma2015)
#filter columns
RMA <- data.frame(RMA[,1], RMA[,3], RMA[,5], RMA[,7], RMA[,12], RMA[,13], RMA[,14], RMA[,15], RMA[,16])
#set column names
colnames(RMA) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "acres", "loss")
#subset 2001 to 2015 for ID, WA, and OR
RMA_PNW <- subset(RMA, state == "WA" | state == "OR" | state == "ID" )
#subset 1989 to 2000 for ID, WA, and OR
RMA_PNW_1989 <- subset(RMA_1989, state == "WA" | state == "OR" | state == "ID" )
#temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
#myfiles = lapply(temp, read.csv, header = TRUE)
#ziggy.df <- do.call(rbind , myfiles)
RMA_PNW <- as.data.frame(RMA_PNW)
RMA_PNW$county <- as.character(RMA_PNW$county)
RMA_PNW$damagecause <- as.character(RMA_PNW$damagecause)
xrange <- RMA_PNW
RMA_PNW$commodity <- trimws(RMA_PNW$commodity)
RMA_PNW$county <- trimws(RMA_PNW$county)
RMA_PNW$damagecause <- trimws(RMA_PNW$damagecause)
ID_RMA_PNW <- subset(RMA_PNW, state == "ID")
OR_RMA_PNW <- subset(RMA_PNW, state == "OR")
WA_RMA_PNW <- subset(RMA_PNW, state == "WA")
#--OR
#--acres for all damage causes aggregated
OR_loss_commodity <- subset(OR_RMA_PNW, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$year, OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
OR_loss1 <- subset(OR_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("year", "county", "state", "loss")
colnames(OR_loss2_acres) <- c("year", "county", "state", "acres")
OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres
OR_loss_climate <- merge(OR_loss2, OR2[-4], by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("year", "county", "state"))
#-WA
#--acres for all damage causes aggregated
WA_loss_commodity <- subset(WA_RMA_PNW, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$year, WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
WA_loss1 <- subset(WA_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("year", "county", "state", "loss")
colnames(WA_loss2_acres) <- c("year", "county", "state", "acres")
WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres
WA_loss_climate <- merge(WA_loss2, WA2[-4], by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("year", "county", "state"))
#-ID
#--acres for all damage causes aggregated
ID_loss_commodity <- subset(ID_RMA_PNW, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$year, ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
ID_loss1 <- subset(ID_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("year", "county", "state", "loss")
colnames(ID_loss2_acres) <- c("year", "county", "state", "acres")
ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres
ID_loss_climate <- merge(ID_loss2, ID2[-4], by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("year", "county", "state"))
merged_iPNW <- rbind(ID_loss_climate_2, WA_loss_climate_2, OR_loss_climate_2)
merged_iPNW$pct_acreage <- merged_iPNW$acres / merged_iPNW$all_damagecause_acres
#
#--plotting results of individual variables
pairs(cube_loss ~ pr_scaled + tmmx_scaled + pet_scaled + pdsi_scaled, data = data4aa, col = data4aa$state,
lower.panel=panel.smooth, upper.panel=panel.cor, main = "initial pairs plot")
par(mar=c(12.7, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
data4aaaa <- data4aa
ggplot(data4aaaa, aes(pet_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Potential Evapotranspiration (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(pr_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Precipitation (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(tmmx_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Max Temperature (C)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(pdsi_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled PDSI", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(prpet, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Aridity Index (PR / PET)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
# Make big tree
form <- as.formula(loss_zscore ~ pr_zscore + tmmx_zscore + pet_zscore)
form2 <- as.formula(cube_loss ~ pr + tmmx + pet + pdsi + price)
form2a <- as.formula(loss ~ pr + tmmx + pet + pdsi + price)
form2b <- as.formula(cube_loss ~ pr.x + tmmx.x + pet.x + pdsi.x + price)
#form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)
form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + price)
form3a <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)
data44a <- data.frame(data4aaaa)
data44a <- merge(merged_iPNW, data44a, by=c("state", "county","year"))
data5a_statecountyear <- cbind.data.frame(data44a$state, data44a$county, data44a$year)
colnames(data5a_statecountyear) <- c("state", "county", "year")
data44a$tmmx.x <- data44a$tmmx.x - 273
data5a <- cbind.data.frame(data44a$year, data44a$cube_loss, data44a$loss, data44a$pr_scaled, data44a$tmmx_scaled, data44a$fm100.x, data44a$fm1000.x, data44a$pdsi_scaled, data44a$erc, data44a$srad, data44a$vs, data44a$sph, data44a$th, data44a$pet_scaled, data44a$pct_acreage, data44a$price_scaled, data44a$state)
colnames(data5a) <- c("year", "cube_loss", "loss", "pr", "tmmx", "fm100", "fm1000", "pdsi", "erc", "srad", "vs", "sph", "th", "pet", "pct_acreage", "price", "state" )
data5a <- data.frame(data5a)
data5a$loss <- log10(data5a$loss)
data5ab <- cbind.data.frame(data5a$loss, data5a$pr, data5a$tmmx, data5a$pdsi, data5a$pet, data5a$price)
colnames(data5ab) <- c("loss", "pr", "tmmx", "pdsi", "pet", "price")
data5ab <- data.frame(data5ab)
#data5ab$loss <- log10(data5ab$loss)
# load libraries
library(caret)
library(rpart)
# define training control
train_control<- trainControl(method="repeatedcv", number=10, savePredictions = TRUE)
data5b <- data.frame(data5a)
data5b <- na.omit(data5b)
data5ab <- data.frame(data5ab)
data5ab <- na.omit(data5ab)
#pairs(loss ~ pr + tmmx + pet + pdsi, data = data5b, col = data5b$state,
# lower.panel=panel.smooth, upper.panel=panel.cor, main = "initial pairs plot")
# train the model
set.seed(9992)
#trainIndex <- sample(1:nrow(data5b), 0.8 * nrow(data5b))
trainIndex <- createDataPartition(data5b$pct_acreage, p = .75, list = FALSE)
train <- data5b[trainIndex,]
test <- data5b[-trainIndex,]
trainIndex_loss <- caret::createDataPartition(data5ab$loss, p = .75, list = FALSE)
train_loss <- data5ab[trainIndex_loss,]
test_loss <- data5ab[-trainIndex_loss,]
#traindata=data.frame(x=train_loss,y=(train_loss))
#testdata = data.frame(x=test_loss,y=(test_loss))
rf_grid <- expand.grid(mtry = 2:5) # you can put different values for mtry
model<- caret::train(form3, data=train, trControl=train_control, importance=T, method="rf", tuneGrid = rf_grid, ntrees = 500)
model_singular <- caret::train(form3, data=train, trControl=train_control, method="rpart")
model_loss <- caret::train(form2a, data=train_loss, trControl=train_control, method="rf", tuneGrid = rf_grid, ntrees = 1000, importance = T)
model_loss_all <- caret::train(form2a, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)
model_loss_all_acreage <- caret::train(form3, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)
#cforest_model_loss <- cforest(form2a,
# data = train_loss,
# controls=cforest_unbiased(ntree=2000, mtry=5))
#lattice::densityplot(model_loss,
# adjust = 1.25)
tree_num <- which(model_loss$finalModel$err.rate[, 1] == min(model_loss$finalModel$err.rate[, 1]))
lda_data <- learing_curve_dat(dat = model$trainingData,
outcome = ".outcome",
## `train` arguments:
metric = "RMSE",
trControl = train_control,
method = "rf")
## Training for 10% (n = 26)
## Training for 20% (n = 52)
## Training for 30% (n = 79)
## Training for 40% (n = 105)
## Training for 50% (n = 132)
## Training for 60% (n = 158)
## Training for 70% (n = 184)
## Training for 80% (n = 211)
## Training for 90% (n = 237)
## Training for 100% (n = 264)
pts <- pretty(lda_data$RMSE)
#pts <- c(0,0.1, 0.2, 0.3, 0.4)
lda_data$Data[lda_data$Data == "Resampling"] <- c("Validation")
ggplot(lda_data, aes(x = Training_Size, y = RMSE, color = Data)) +
geom_smooth(method = loess, span = .8) +
theme_bw() + ggtitle("Random Forest Learning Curves: Train vs. Validation") + theme(axis.title.y = element_text(family = "Serif", size=18), axis.title.x = element_text(family = "Serif", size = 18), axis.text.x = element_text(size=rel(1.9), angle = 90, hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.9), hjust = 1, family = "Serif")) + theme(plot.title = element_text(family = "Serif", vjust = 2)) + theme(legend.text=element_text(family = "Serif", size=14)) + theme(legend.title=element_text(family = "Serif", size=16, face = "bold")) + theme(plot.title = element_text(size=24, face = "bold")) + scale_y_continuous(labels = pts, breaks = pts ) + xlab("Training Size") + ylab("RMSE") + theme(legend.position="bottom") + scale_fill_discrete(name = "Legend")
sqrt(model_loss$finalModel$mse[which.min(model_loss$finalModel$mse)])
## [1] 0.7163278
which.min(model_loss$finalModel$mse)
## [1] 479
importance <- varImp(model_loss, scale=FALSE)
importance2 <- importance$importance
importance2$variable <- rownames(importance2)
importance2 <- importance2[order(-importance2$Overall),]
par(mar=c(6, 7, 2, 3), family = 'serif', mgp=c(5, 1, 0), las=0)
barplot(sort(importance2$Overall), horiz = TRUE, col = "lightblue", ylab = "predictor variables", xlab = "% importance", cex.lab = 1.7, las = 2, cex.axis = 1.8, cex.names = 1.8, names.arg = rev(importance2$variable))
#NRMSE
nrmse_func <- function(obs, pred, type = "sd") {
squared_sums <- sum((obs - pred)^2)
mse <- squared_sums/length(obs)
rmse <- sqrt(mse)
if (type == "sd") nrmse <- rmse/sd(obs)
if (type == "mean") nrmse <- rmse/mean(obs)
if (type == "maxmin") nrmse <- rmse/ (max(obs) - min(obs))
if (type == "iq") nrmse <- rmse/ (quantile(obs, 0.75) - quantile(obs, 0.25))
if (!type %in% c("mean", "sd", "maxmin", "iq")) message("Wrong type!")
nrmse <- round(nrmse, 3)
return(nrmse)
}
#ensembe
library(caretEnsemble)
#algorithmList <- c('gbm', 'rpart', 'ctree', 'rf', 'HYFIS', 'FS.HGD', 'ANFIS' )
algorithmList <- c('gbm', 'rpart', 'ctree', 'rf')
set.seed(100)
models <- caretList(form2, data=data5b, trControl=train_control, methodList=algorithmList)
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 45571.9967 -nan 0.1000 1347.8847
## 2 43706.1157 -nan 0.1000 1473.7796
## 3 42226.9439 -nan 0.1000 933.0998
## 4 41057.1838 -nan 0.1000 1054.8961
## 5 40036.4588 -nan 0.1000 746.8285
## 6 38827.0407 -nan 0.1000 1072.6552
## 7 37822.7236 -nan 0.1000 783.4970
## 8 37099.1738 -nan 0.1000 717.0261
## 9 36431.3838 -nan 0.1000 283.0984
## 10 35924.4774 -nan 0.1000 249.2798
## 20 31157.0867 -nan 0.1000 -116.8874
## 40 28225.5251 -nan 0.1000 -109.2071
## 60 27181.9690 -nan 0.1000 -90.3829
## 80 26374.5114 -nan 0.1000 -29.7355
## 100 25522.8726 -nan 0.1000 -127.3438
## 120 24878.5051 -nan 0.1000 -113.4964
## 140 24283.1354 -nan 0.1000 -97.6861
## 150 24055.7699 -nan 0.1000 -78.9351
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 44185.0623 -nan 0.1000 2953.7198
## 2 41611.0650 -nan 0.1000 2339.1270
## 3 39369.8265 -nan 0.1000 2040.5440
## 4 37446.5200 -nan 0.1000 1838.6510
## 5 35922.9177 -nan 0.1000 1237.2045
## 6 34402.5021 -nan 0.1000 885.3945
## 7 33096.8173 -nan 0.1000 999.2937
## 8 32197.6779 -nan 0.1000 546.7953
## 9 31190.0078 -nan 0.1000 651.8482
## 10 30491.3652 -nan 0.1000 722.5687
## 20 26739.7081 -nan 0.1000 137.5739
## 40 23120.5943 -nan 0.1000 -383.4063
## 60 21247.2739 -nan 0.1000 -62.3426
## 80 19683.3442 -nan 0.1000 -109.9295
## 100 18017.2047 -nan 0.1000 -149.0337
## 120 16793.3256 -nan 0.1000 -157.3972
## 140 16088.8012 -nan 0.1000 -54.3143
## 150 15766.3020 -nan 0.1000 -102.4167
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 44070.5813 -nan 0.1000 2963.5876
## 2 41216.2205 -nan 0.1000 2033.6613
## 3 38967.6054 -nan 0.1000 1766.3968
## 4 37234.2776 -nan 0.1000 1170.2250
## 5 35207.8928 -nan 0.1000 1769.7125
## 6 33495.2884 -nan 0.1000 1245.8306
## 7 32280.8803 -nan 0.1000 931.3760
## 8 31133.3247 -nan 0.1000 585.1639
## 9 30333.9086 -nan 0.1000 685.1851
## 10 29757.1657 -nan 0.1000 72.4121
## 20 25082.3239 -nan 0.1000 -60.7711
## 40 20432.9142 -nan 0.1000 -135.6398
## 60 17726.1503 -nan 0.1000 -166.1295
## 80 15840.0190 -nan 0.1000 -225.1396
## 100 14296.4368 -nan 0.1000 -40.5236
## 120 13037.5497 -nan 0.1000 -155.7841
## 140 12271.6360 -nan 0.1000 -21.9658
## 150 11782.2423 -nan 0.1000 -169.3482
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 49820.0430 -nan 0.1000 1944.1522
## 2 47995.6635 -nan 0.1000 1720.9042
## 3 46197.4986 -nan 0.1000 1428.4555
## 4 44717.6850 -nan 0.1000 1179.4257
## 5 43388.9852 -nan 0.1000 1023.4499
## 6 42262.9960 -nan 0.1000 793.3409
## 7 41223.4931 -nan 0.1000 586.2131
## 8 40359.1044 -nan 0.1000 527.2202
## 9 39487.7539 -nan 0.1000 900.8536
## 10 38714.6003 -nan 0.1000 449.8300
## 20 33774.7019 -nan 0.1000 205.4066
## 40 29897.3338 -nan 0.1000 -71.6875
## 60 28539.4470 -nan 0.1000 -167.2461
## 80 27614.5896 -nan 0.1000 -77.5812
## 100 26575.7723 -nan 0.1000 3.3054
## 120 25802.7776 -nan 0.1000 -73.3432
## 140 25007.5663 -nan 0.1000 -18.1882
## 150 24656.0787 -nan 0.1000 -76.9476
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 49245.4193 -nan 0.1000 1552.7467
## 2 46525.2431 -nan 0.1000 1994.4411
## 3 43876.5626 -nan 0.1000 2447.1646
## 4 41877.0815 -nan 0.1000 2029.6277
## 5 40338.5542 -nan 0.1000 1625.4020
## 6 38590.2914 -nan 0.1000 1570.3231
## 7 37038.8184 -nan 0.1000 1126.2944
## 8 36150.6687 -nan 0.1000 666.6455
## 9 35138.5735 -nan 0.1000 669.4421
## 10 34205.2666 -nan 0.1000 638.4603
## 20 28000.7642 -nan 0.1000 143.6927
## 40 23684.2034 -nan 0.1000 106.0152
## 60 21269.8980 -nan 0.1000 94.1118
## 80 19723.9114 -nan 0.1000 -107.9941
## 100 18198.9753 -nan 0.1000 -171.8609
## 120 17033.4741 -nan 0.1000 -198.6796
## 140 16038.6785 -nan 0.1000 -52.4454
## 150 15615.6507 -nan 0.1000 -78.9264
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 47851.6317 -nan 0.1000 3568.1534
## 2 44385.5476 -nan 0.1000 2176.7334
## 3 41505.2606 -nan 0.1000 2394.2301
## 4 39199.2061 -nan 0.1000 1619.8689
## 5 37878.0577 -nan 0.1000 725.9094
## 6 35567.1437 -nan 0.1000 1063.3310
## 7 34246.3304 -nan 0.1000 1087.2448
## 8 33184.6546 -nan 0.1000 616.6300
## 9 32051.9341 -nan 0.1000 417.1295
## 10 30754.0509 -nan 0.1000 560.2631
## 20 25191.4991 -nan 0.1000 162.4201
## 40 20706.3906 -nan 0.1000 -246.4121
## 60 17362.4661 -nan 0.1000 298.4624
## 80 15394.9053 -nan 0.1000 -90.3433
## 100 13863.7916 -nan 0.1000 -124.0408
## 120 12649.7822 -nan 0.1000 -278.2822
## 140 11600.7768 -nan 0.1000 -131.9622
## 150 11157.7773 -nan 0.1000 25.2477
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 51864.4320 -nan 0.1000 1974.3892
## 2 50140.9346 -nan 0.1000 1465.6597
## 3 48422.3850 -nan 0.1000 1137.2598
## 4 47015.4662 -nan 0.1000 1158.7479
## 5 45681.6114 -nan 0.1000 1025.4910
## 6 44408.3090 -nan 0.1000 -23.0852
## 7 43255.4710 -nan 0.1000 770.0290
## 8 42430.4781 -nan 0.1000 757.5815
## 9 41760.0340 -nan 0.1000 561.5739
## 10 40969.3087 -nan 0.1000 184.5594
## 20 36529.4554 -nan 0.1000 194.4991
## 40 33769.0520 -nan 0.1000 53.6747
## 60 32239.6848 -nan 0.1000 -359.1541
## 80 30848.3755 -nan 0.1000 -35.9471
## 100 29671.8747 -nan 0.1000 -198.4972
## 120 28971.2586 -nan 0.1000 -133.9165
## 140 28144.8109 -nan 0.1000 -262.3592
## 150 27718.0092 -nan 0.1000 -188.2970
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50676.0844 -nan 0.1000 2330.7197
## 2 48339.3231 -nan 0.1000 1570.6150
## 3 46553.5177 -nan 0.1000 1083.7795
## 4 44545.6800 -nan 0.1000 1181.0808
## 5 42804.0484 -nan 0.1000 1833.3006
## 6 41432.8633 -nan 0.1000 1290.0212
## 7 40167.7777 -nan 0.1000 597.1119
## 8 39015.7996 -nan 0.1000 891.1397
## 9 38129.1749 -nan 0.1000 -12.7496
## 10 37022.8829 -nan 0.1000 706.1514
## 20 32248.3341 -nan 0.1000 15.5678
## 40 27441.2026 -nan 0.1000 -166.0665
## 60 25095.1842 -nan 0.1000 -72.5908
## 80 23005.0404 -nan 0.1000 -92.4357
## 100 21356.2008 -nan 0.1000 -36.8375
## 120 19982.1154 -nan 0.1000 56.1846
## 140 18800.4898 -nan 0.1000 -172.8866
## 150 18121.5940 -nan 0.1000 -100.8612
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50499.9147 -nan 0.1000 2961.5835
## 2 47637.9450 -nan 0.1000 2133.2219
## 3 45032.5800 -nan 0.1000 2214.2762
## 4 42967.5519 -nan 0.1000 1010.4827
## 5 41782.9977 -nan 0.1000 690.9631
## 6 40226.1644 -nan 0.1000 1024.6175
## 7 39028.2233 -nan 0.1000 799.9360
## 8 37830.2803 -nan 0.1000 185.3153
## 9 36618.9748 -nan 0.1000 604.2677
## 10 35746.8661 -nan 0.1000 236.7734
## 20 30077.5272 -nan 0.1000 -116.9216
## 40 22726.2573 -nan 0.1000 -29.2178
## 60 20388.6205 -nan 0.1000 79.9329
## 80 18194.2445 -nan 0.1000 -24.0247
## 100 16210.6387 -nan 0.1000 -74.5641
## 120 14837.1060 -nan 0.1000 -7.2506
## 140 13489.5962 -nan 0.1000 -88.4818
## 150 13045.7364 -nan 0.1000 -177.1660
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 51685.8297 -nan 0.1000 1915.6448
## 2 49856.8534 -nan 0.1000 1541.1501
## 3 48050.8276 -nan 0.1000 1095.9056
## 4 46661.7100 -nan 0.1000 993.9875
## 5 45110.4678 -nan 0.1000 603.9911
## 6 43407.2590 -nan 0.1000 686.6371
## 7 42157.8470 -nan 0.1000 605.2237
## 8 41369.5002 -nan 0.1000 702.1686
## 9 40587.0253 -nan 0.1000 322.3502
## 10 39736.1717 -nan 0.1000 374.2463
## 20 35686.5504 -nan 0.1000 250.2065
## 40 32946.3518 -nan 0.1000 -19.5912
## 60 31533.4954 -nan 0.1000 -37.2919
## 80 30449.1279 -nan 0.1000 -202.9466
## 100 29524.4770 -nan 0.1000 -257.7878
## 120 28674.5226 -nan 0.1000 -61.7317
## 140 27843.3906 -nan 0.1000 -23.3820
## 150 27497.7295 -nan 0.1000 -156.4054
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50743.9863 -nan 0.1000 3287.2097
## 2 47621.7099 -nan 0.1000 2033.1226
## 3 45462.3824 -nan 0.1000 2113.3776
## 4 42968.8532 -nan 0.1000 1845.1285
## 5 41234.1718 -nan 0.1000 1672.6022
## 6 39922.7531 -nan 0.1000 777.6905
## 7 38232.6350 -nan 0.1000 983.2204
## 8 37194.5285 -nan 0.1000 586.9659
## 9 36532.3462 -nan 0.1000 -104.9967
## 10 35935.8858 -nan 0.1000 326.9653
## 20 30511.4831 -nan 0.1000 50.6200
## 40 26809.4283 -nan 0.1000 -59.2592
## 60 25078.9213 -nan 0.1000 -118.7054
## 80 22529.5604 -nan 0.1000 -204.9102
## 100 20759.1184 -nan 0.1000 -183.3041
## 120 19353.7905 -nan 0.1000 -42.0783
## 140 18037.6204 -nan 0.1000 -246.9130
## 150 17392.3890 -nan 0.1000 -69.0175
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50298.5415 -nan 0.1000 2881.0502
## 2 47295.9871 -nan 0.1000 2548.9872
## 3 44774.2123 -nan 0.1000 2143.4224
## 4 42790.2301 -nan 0.1000 1503.6932
## 5 40836.1872 -nan 0.1000 1539.4657
## 6 38845.1884 -nan 0.1000 1110.9245
## 7 37737.9741 -nan 0.1000 737.1635
## 8 36472.2379 -nan 0.1000 915.2609
## 9 35280.4969 -nan 0.1000 772.7292
## 10 34792.3238 -nan 0.1000 5.6171
## 20 28027.5999 -nan 0.1000 6.2075
## 40 23131.1276 -nan 0.1000 -134.2211
## 60 20440.4965 -nan 0.1000 -144.1386
## 80 17513.8661 -nan 0.1000 -109.5384
## 100 15600.5567 -nan 0.1000 -108.8007
## 120 14427.7950 -nan 0.1000 -265.1689
## 140 13543.9577 -nan 0.1000 -32.5880
## 150 12852.1341 -nan 0.1000 -139.7807
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 48126.4645 -nan 0.1000 1501.0104
## 2 46333.6397 -nan 0.1000 1766.3519
## 3 44878.2794 -nan 0.1000 815.6747
## 4 43607.8743 -nan 0.1000 934.5319
## 5 42268.9662 -nan 0.1000 1139.2679
## 6 41098.7479 -nan 0.1000 865.7983
## 7 39939.7220 -nan 0.1000 911.5263
## 8 39020.5703 -nan 0.1000 632.0114
## 9 38117.2252 -nan 0.1000 809.5149
## 10 37398.4297 -nan 0.1000 259.7864
## 20 32862.1363 -nan 0.1000 130.6194
## 40 29818.5887 -nan 0.1000 67.7131
## 60 28699.8411 -nan 0.1000 -1.6072
## 80 27639.9586 -nan 0.1000 -111.4352
## 100 26690.7274 -nan 0.1000 -240.2014
## 120 26077.4213 -nan 0.1000 -108.0136
## 140 25359.1170 -nan 0.1000 -135.8198
## 150 24963.8677 -nan 0.1000 -44.6817
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 46994.3787 -nan 0.1000 2366.0756
## 2 44782.7195 -nan 0.1000 1625.6221
## 3 42630.7941 -nan 0.1000 2010.9028
## 4 40380.3140 -nan 0.1000 1907.5240
## 5 38511.5059 -nan 0.1000 1347.3762
## 6 37158.2141 -nan 0.1000 1040.7794
## 7 35944.9188 -nan 0.1000 988.5578
## 8 34918.5429 -nan 0.1000 601.4898
## 9 33907.2410 -nan 0.1000 563.7247
## 10 33129.0498 -nan 0.1000 433.7560
## 20 29021.1943 -nan 0.1000 -209.3625
## 40 25838.2679 -nan 0.1000 -117.5195
## 60 22995.6265 -nan 0.1000 86.7847
## 80 21129.8014 -nan 0.1000 -73.0168
## 100 19722.1231 -nan 0.1000 -355.7565
## 120 18483.3564 -nan 0.1000 -103.9658
## 140 17421.1086 -nan 0.1000 -129.3636
## 150 17071.7623 -nan 0.1000 -114.5686
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 46583.1900 -nan 0.1000 3013.0219
## 2 43623.0206 -nan 0.1000 2192.9443
## 3 40720.1256 -nan 0.1000 2256.8098
## 4 38533.5971 -nan 0.1000 1941.2532
## 5 36799.8214 -nan 0.1000 1534.8973
## 6 34950.5173 -nan 0.1000 1249.4438
## 7 33560.2806 -nan 0.1000 1054.6028
## 8 32402.4008 -nan 0.1000 516.6160
## 9 31519.8047 -nan 0.1000 231.2806
## 10 30756.4211 -nan 0.1000 257.5617
## 20 25891.9783 -nan 0.1000 -240.3685
## 40 22095.3850 -nan 0.1000 262.8500
## 60 18425.8008 -nan 0.1000 -164.1895
## 80 16429.0818 -nan 0.1000 12.8938
## 100 14747.9318 -nan 0.1000 -118.2120
## 120 13549.6807 -nan 0.1000 -95.2072
## 140 12588.2735 -nan 0.1000 -153.4402
## 150 12136.6014 -nan 0.1000 -111.2775
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50540.4276 -nan 0.1000 1122.2727
## 2 48809.0721 -nan 0.1000 1734.7050
## 3 47228.1668 -nan 0.1000 1511.8777
## 4 45984.2442 -nan 0.1000 945.9184
## 5 44843.0796 -nan 0.1000 1090.0309
## 6 43687.9661 -nan 0.1000 927.2358
## 7 42652.2804 -nan 0.1000 543.1433
## 8 41764.9691 -nan 0.1000 727.9153
## 9 41175.1984 -nan 0.1000 243.5311
## 10 40476.1859 -nan 0.1000 237.0239
## 20 36086.0535 -nan 0.1000 -77.1676
## 40 32614.4843 -nan 0.1000 -62.6594
## 60 31140.0904 -nan 0.1000 -163.5978
## 80 29723.5330 -nan 0.1000 -276.2569
## 100 28577.7222 -nan 0.1000 -97.7706
## 120 27670.2153 -nan 0.1000 28.6113
## 140 26716.8465 -nan 0.1000 -331.2817
## 150 26247.9876 -nan 0.1000 -161.0587
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 48706.3864 -nan 0.1000 2390.7164
## 2 46080.9831 -nan 0.1000 2422.7456
## 3 43791.0307 -nan 0.1000 1645.3846
## 4 41792.3412 -nan 0.1000 1734.6349
## 5 40375.2333 -nan 0.1000 303.1323
## 6 38850.9110 -nan 0.1000 976.8961
## 7 37775.4334 -nan 0.1000 413.0602
## 8 37078.1328 -nan 0.1000 148.1620
## 9 36319.7583 -nan 0.1000 273.4414
## 10 35650.7549 -nan 0.1000 -2.0334
## 20 30371.7856 -nan 0.1000 134.4697
## 40 25595.8159 -nan 0.1000 -20.7030
## 60 23189.9930 -nan 0.1000 -167.1957
## 80 21084.4241 -nan 0.1000 -33.5400
## 100 19651.7523 -nan 0.1000 -110.3095
## 120 18386.2839 -nan 0.1000 -123.0467
## 140 17378.5990 -nan 0.1000 -37.2071
## 150 16862.1094 -nan 0.1000 -87.3071
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 48583.5167 -nan 0.1000 2289.4732
## 2 46259.7320 -nan 0.1000 1239.5100
## 3 43481.7110 -nan 0.1000 1916.4020
## 4 41348.3128 -nan 0.1000 1686.7583
## 5 39755.5499 -nan 0.1000 1178.2566
## 6 38288.2426 -nan 0.1000 1038.9520
## 7 37265.1784 -nan 0.1000 117.9867
## 8 36424.7195 -nan 0.1000 113.3802
## 9 35466.1783 -nan 0.1000 541.9288
## 10 33928.7039 -nan 0.1000 698.8113
## 20 28592.9046 -nan 0.1000 477.9799
## 40 22426.2264 -nan 0.1000 -116.1006
## 60 19145.6228 -nan 0.1000 -169.7116
## 80 17472.3659 -nan 0.1000 -63.1810
## 100 15883.9436 -nan 0.1000 -151.8144
## 120 14781.6956 -nan 0.1000 -133.8376
## 140 13625.5894 -nan 0.1000 -166.3918
## 150 13181.0354 -nan 0.1000 -181.0526
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50106.0131 -nan 0.1000 1696.6948
## 2 48191.2897 -nan 0.1000 1840.1048
## 3 46678.1575 -nan 0.1000 1144.9227
## 4 44994.1250 -nan 0.1000 1294.7915
## 5 43859.9590 -nan 0.1000 1043.4872
## 6 42721.4887 -nan 0.1000 1008.3102
## 7 41551.5071 -nan 0.1000 887.4136
## 8 40607.1356 -nan 0.1000 563.1316
## 9 39692.4938 -nan 0.1000 689.6011
## 10 38994.9483 -nan 0.1000 425.9485
## 20 34261.0672 -nan 0.1000 20.0803
## 40 31412.4529 -nan 0.1000 1.4015
## 60 30182.8418 -nan 0.1000 -139.6747
## 80 29024.6574 -nan 0.1000 -111.9641
## 100 28060.2630 -nan 0.1000 -26.4089
## 120 27075.6381 -nan 0.1000 -121.2704
## 140 26410.0445 -nan 0.1000 -99.3217
## 150 25981.1889 -nan 0.1000 -91.5102
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 48822.2234 -nan 0.1000 3020.4579
## 2 46739.3718 -nan 0.1000 1684.9822
## 3 44863.0229 -nan 0.1000 1398.0539
## 4 42862.2758 -nan 0.1000 1814.2240
## 5 41185.2553 -nan 0.1000 1427.4025
## 6 39796.3477 -nan 0.1000 1122.1133
## 7 39197.4201 -nan 0.1000 -6.5360
## 8 37975.3088 -nan 0.1000 810.2737
## 9 37131.1218 -nan 0.1000 463.4211
## 10 35961.9594 -nan 0.1000 739.0829
## 20 31371.4613 -nan 0.1000 15.1374
## 40 26856.5886 -nan 0.1000 -470.7550
## 60 23997.1852 -nan 0.1000 15.4109
## 80 22148.8598 -nan 0.1000 -166.0397
## 100 20262.2854 -nan 0.1000 10.1045
## 120 19117.1424 -nan 0.1000 -120.2211
## 140 18043.4686 -nan 0.1000 -167.5913
## 150 17687.5431 -nan 0.1000 -156.1959
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 48207.6763 -nan 0.1000 3399.9230
## 2 45565.7518 -nan 0.1000 1774.9105
## 3 43225.0273 -nan 0.1000 1888.4744
## 4 41338.5027 -nan 0.1000 1524.8722
## 5 39702.5578 -nan 0.1000 849.5830
## 6 37829.7666 -nan 0.1000 856.1821
## 7 36576.8931 -nan 0.1000 980.9611
## 8 35303.1557 -nan 0.1000 347.1572
## 9 34273.0471 -nan 0.1000 833.8494
## 10 33208.5038 -nan 0.1000 681.6445
## 20 28287.5197 -nan 0.1000 2.2072
## 40 23908.0702 -nan 0.1000 -311.4130
## 60 20591.3780 -nan 0.1000 -79.1894
## 80 18095.3402 -nan 0.1000 -280.3663
## 100 16473.2902 -nan 0.1000 -174.8572
## 120 15156.7878 -nan 0.1000 -301.8304
## 140 13597.8447 -nan 0.1000 -18.9977
## 150 13120.4740 -nan 0.1000 -12.3918
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 51830.8773 -nan 0.1000 1805.7643
## 2 50179.4826 -nan 0.1000 1743.5426
## 3 48445.8826 -nan 0.1000 772.0854
## 4 47094.7543 -nan 0.1000 1349.9851
## 5 45884.6074 -nan 0.1000 801.6917
## 6 44450.8913 -nan 0.1000 934.7534
## 7 43431.3147 -nan 0.1000 334.5053
## 8 42173.5079 -nan 0.1000 839.5433
## 9 41221.8725 -nan 0.1000 670.2029
## 10 40396.3274 -nan 0.1000 750.4961
## 20 35950.9093 -nan 0.1000 -258.0982
## 40 32688.0820 -nan 0.1000 62.3145
## 60 31067.8602 -nan 0.1000 -70.5081
## 80 30067.4492 -nan 0.1000 48.0768
## 100 29032.0504 -nan 0.1000 -60.4241
## 120 28425.4374 -nan 0.1000 -235.4553
## 140 27544.9104 -nan 0.1000 -245.2624
## 150 27231.9356 -nan 0.1000 -84.1448
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50631.5334 -nan 0.1000 3136.8134
## 2 47947.1471 -nan 0.1000 2766.3509
## 3 45297.8075 -nan 0.1000 1319.4048
## 4 43663.6449 -nan 0.1000 968.4453
## 5 42004.6954 -nan 0.1000 1402.9205
## 6 40228.5311 -nan 0.1000 1230.7467
## 7 39146.2099 -nan 0.1000 626.9079
## 8 38220.5020 -nan 0.1000 712.5858
## 9 37328.6846 -nan 0.1000 626.8824
## 10 36636.3856 -nan 0.1000 303.4536
## 20 31825.2242 -nan 0.1000 -336.6848
## 40 26891.8195 -nan 0.1000 -42.2311
## 60 24228.7427 -nan 0.1000 280.3432
## 80 22203.2760 -nan 0.1000 -82.9231
## 100 20541.5620 -nan 0.1000 -151.4118
## 120 19451.7714 -nan 0.1000 -136.2038
## 140 18384.2445 -nan 0.1000 -76.2965
## 150 18117.6406 -nan 0.1000 -133.0888
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50377.0804 -nan 0.1000 2816.5664
## 2 46603.6247 -nan 0.1000 2325.3695
## 3 44128.7254 -nan 0.1000 1676.0059
## 4 41159.6245 -nan 0.1000 1556.7815
## 5 39604.3524 -nan 0.1000 1525.1722
## 6 38168.2757 -nan 0.1000 832.6823
## 7 37191.7778 -nan 0.1000 832.8172
## 8 36081.0959 -nan 0.1000 921.2319
## 9 35068.7790 -nan 0.1000 118.5416
## 10 34448.2212 -nan 0.1000 356.0382
## 20 28234.6485 -nan 0.1000 692.2787
## 40 23136.5929 -nan 0.1000 24.4731
## 60 19988.2068 -nan 0.1000 -107.7626
## 80 17712.0669 -nan 0.1000 -342.1278
## 100 15672.7356 -nan 0.1000 -163.6213
## 120 14293.3516 -nan 0.1000 18.5972
## 140 13248.2712 -nan 0.1000 -151.6262
## 150 12889.4167 -nan 0.1000 -128.1727
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 52214.3858 -nan 0.1000 1600.9756
## 2 50758.2696 -nan 0.1000 1489.6673
## 3 49054.2707 -nan 0.1000 1327.9872
## 4 47301.3685 -nan 0.1000 399.8762
## 5 45768.8592 -nan 0.1000 1493.4214
## 6 44803.9991 -nan 0.1000 640.4816
## 7 43416.7276 -nan 0.1000 979.7663
## 8 42273.2864 -nan 0.1000 812.1099
## 9 41199.6027 -nan 0.1000 817.1695
## 10 40503.4363 -nan 0.1000 610.6940
## 20 35883.9462 -nan 0.1000 12.4933
## 40 32670.8970 -nan 0.1000 -64.3296
## 60 31260.5558 -nan 0.1000 -155.4968
## 80 30466.2838 -nan 0.1000 26.1934
## 100 29598.0159 -nan 0.1000 -86.8621
## 120 28718.5199 -nan 0.1000 -24.5701
## 140 27830.5249 -nan 0.1000 -122.2310
## 150 27507.0052 -nan 0.1000 -66.8147
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50980.7464 -nan 0.1000 2729.4021
## 2 47966.8431 -nan 0.1000 2385.4670
## 3 45573.2955 -nan 0.1000 1952.0186
## 4 43220.3331 -nan 0.1000 1580.5582
## 5 41316.0711 -nan 0.1000 1520.8372
## 6 40110.7788 -nan 0.1000 758.2270
## 7 38984.2587 -nan 0.1000 759.9578
## 8 37643.8809 -nan 0.1000 885.7060
## 9 36460.0558 -nan 0.1000 392.5879
## 10 35747.5470 -nan 0.1000 588.0000
## 20 31109.7799 -nan 0.1000 1.1121
## 40 26587.3364 -nan 0.1000 -188.9186
## 60 24433.8542 -nan 0.1000 -126.0805
## 80 22663.1829 -nan 0.1000 -324.6072
## 100 20799.8432 -nan 0.1000 -292.5516
## 120 19617.6967 -nan 0.1000 -97.9638
## 140 18541.7271 -nan 0.1000 -220.0425
## 150 18079.2632 -nan 0.1000 -120.0637
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 50567.2677 -nan 0.1000 3023.6364
## 2 47319.1588 -nan 0.1000 2752.2541
## 3 44516.0487 -nan 0.1000 2124.5583
## 4 41905.7002 -nan 0.1000 2060.9382
## 5 40179.1403 -nan 0.1000 1320.4154
## 6 38508.0668 -nan 0.1000 1288.6671
## 7 37192.0612 -nan 0.1000 983.6842
## 8 35956.5975 -nan 0.1000 1152.0748
## 9 35008.1840 -nan 0.1000 712.7755
## 10 33932.6291 -nan 0.1000 529.6448
## 20 29487.5137 -nan 0.1000 -39.4151
## 40 23306.9090 -nan 0.1000 -125.2123
## 60 20043.4544 -nan 0.1000 96.4604
## 80 17568.1606 -nan 0.1000 10.8940
## 100 15994.1306 -nan 0.1000 -137.7734
## 120 14658.3272 -nan 0.1000 -230.2337
## 140 13399.2697 -nan 0.1000 -116.7425
## 150 13042.2759 -nan 0.1000 -153.0592
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 52881.8877 -nan 0.1000 1935.0225
## 2 50680.5827 -nan 0.1000 1500.2538
## 3 48891.7757 -nan 0.1000 1301.7054
## 4 47271.9588 -nan 0.1000 1005.6993
## 5 46078.4382 -nan 0.1000 1167.7233
## 6 44925.5762 -nan 0.1000 933.9795
## 7 44169.0260 -nan 0.1000 414.3433
## 8 43275.4132 -nan 0.1000 775.0406
## 9 42500.9548 -nan 0.1000 531.5001
## 10 41575.8996 -nan 0.1000 575.8893
## 20 36562.6467 -nan 0.1000 162.4247
## 40 33053.7907 -nan 0.1000 86.8902
## 60 31770.8933 -nan 0.1000 -126.8804
## 80 30600.9333 -nan 0.1000 -262.2830
## 100 29786.9128 -nan 0.1000 -109.8036
## 120 28770.9526 -nan 0.1000 -128.7233
## 140 27977.2441 -nan 0.1000 -75.8393
## 150 27615.3638 -nan 0.1000 -93.0683
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 51945.3416 -nan 0.1000 2617.8573
## 2 49202.4023 -nan 0.1000 2311.8246
## 3 47081.7829 -nan 0.1000 845.9174
## 4 44618.7349 -nan 0.1000 2260.0250
## 5 42958.5527 -nan 0.1000 1590.3314
## 6 41228.2994 -nan 0.1000 1502.5759
## 7 40117.9901 -nan 0.1000 852.9022
## 8 38837.6802 -nan 0.1000 856.0611
## 9 37757.7404 -nan 0.1000 766.7507
## 10 36462.9150 -nan 0.1000 903.3834
## 20 31600.2785 -nan 0.1000 239.3625
## 40 27456.3945 -nan 0.1000 -219.2974
## 60 24743.5370 -nan 0.1000 -174.8742
## 80 22866.9126 -nan 0.1000 -233.4481
## 100 21181.6762 -nan 0.1000 -241.3507
## 120 20037.0629 -nan 0.1000 -149.7849
## 140 18687.5030 -nan 0.1000 -54.0579
## 150 18268.0262 -nan 0.1000 -89.2640
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 51184.7485 -nan 0.1000 2755.7692
## 2 48073.5169 -nan 0.1000 2796.8160
## 3 45597.9915 -nan 0.1000 1730.4158
## 4 43298.5591 -nan 0.1000 2129.3832
## 5 41250.3610 -nan 0.1000 1335.1225
## 6 39513.5446 -nan 0.1000 1248.0633
## 7 37845.9931 -nan 0.1000 307.0052
## 8 36804.6634 -nan 0.1000 938.8930
## 9 35755.1542 -nan 0.1000 365.6346
## 10 35030.1046 -nan 0.1000 329.6958
## 20 30317.2463 -nan 0.1000 37.7080
## 40 24363.4596 -nan 0.1000 -45.7571
## 60 20303.1326 -nan 0.1000 -142.1876
## 80 18382.4430 -nan 0.1000 -301.7427
## 100 16498.3040 -nan 0.1000 -273.2904
## 120 15164.8977 -nan 0.1000 -142.7262
## 140 13670.2717 -nan 0.1000 -101.2531
## 150 13219.4634 -nan 0.1000 -123.7589
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 48970.3523 -nan 0.1000 2699.7400
## 2 45935.1251 -nan 0.1000 1891.2065
## 3 43202.1596 -nan 0.1000 2145.8962
## 4 41403.1738 -nan 0.1000 1861.6645
## 5 39585.5690 -nan 0.1000 1241.6523
## 6 38050.0881 -nan 0.1000 768.8151
## 7 36930.1179 -nan 0.1000 215.5665
## 8 35853.4215 -nan 0.1000 872.2806
## 9 35110.9422 -nan 0.1000 73.1868
## 10 34049.9195 -nan 0.1000 440.8312
## 20 28281.5111 -nan 0.1000 -233.3057
## 40 22285.6492 -nan 0.1000 -57.1597
## 60 19437.9398 -nan 0.1000 -11.4153
## 80 17700.1227 -nan 0.1000 -78.2389
## 100 16117.7705 -nan 0.1000 -61.4669
## 120 14606.6936 -nan 0.1000 -109.2797
## 140 13710.3714 -nan 0.1000 -62.3653
## 150 13070.2473 -nan 0.1000 -92.4734
results <- resamples(models)
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: gbm, rpart, ctree, rf
## Number of resamples: 10
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## gbm 127.3188 139.1512 157.1177 170.8398 204.6460 242.6639 0
## rpart 156.5146 178.5645 188.0079 203.0930 229.8374 280.6922 0
## ctree 133.6222 156.6227 185.8617 186.2049 205.1202 244.1956 0
## rf 118.7657 142.4115 149.5901 171.4107 206.9578 260.1863 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## gbm 0.2532567 0.4275054 0.4645965 0.4551981 0.5239915 0.5571124 0
## rpart 0.0385494 0.1692423 0.2150594 0.2258874 0.2677210 0.4457296 0
## ctree 0.1168496 0.2926411 0.3938881 0.3661205 0.4316656 0.5766137 0
## rf 0.2164554 0.3738006 0.4712877 0.4517680 0.5416886 0.6109469 0
set.seed(101)
stackControl <- train_control
# Ensemble the predictions of `models` to form a new combined prediction based on glm
stack.glm <- caretStack(models, method="glm", trControl=stackControl)
print(stack.glm)
## A glm ensemble of 2 base models: gbm, rpart, ctree, rf
##
## Ensemble results:
## Generalized Linear Model
##
## 348 samples
## 4 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 312, 313, 313, 312, 314, 312, ...
## Resampling results:
##
## RMSE Rsquared
## 170.8445 0.4519159
##
##
# make predictions
predictions<- predict(model,data5b)
predictions_loss <- predict(model_loss,data5b)
#cforest_Prediction <- predict(cforest_model_loss, newdata=test_loss, OOB=TRUE, type = "response")
predictions_loss_all <- predict(model_loss_all,data5b)
#--end ensemble
# append predictions
mydat<- cbind(data5b,predictions)
mydat_loss <- cbind(data5b,predictions_loss)
#cforest_mydat_loss <- cbind(test_loss,cforest_Prediction)
mydat_loss_all <- cbind(data5b,predictions_loss_all)
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
finaldat <- subset(finaldat, county != "Kootenai" & county != "Clearwater")
countyvector <- unique(finaldat$county)
county_rmse_r2 <- data.frame(matrix(ncol =2, nrow = 24))
county_rmse_r2$county <- countyvector
colnames(county_rmse_r2) <- c("rmse", "r2", "county")
ii = 0
for (i in countyvector ) {
ii <- ii + 1
countyplot <- subset(finaldat, county == i)
# 2.1. Average of actual data
avr_y_actual <- mean(countyplot$pct_acreage)
# 2.2. Total sum of squares
ss_total <- sum((countyplot$pct_acreage - avr_y_actual)^2)
# 2.3. Regression sum of squares
ss_regression <- sum((countyplot$predictions - avr_y_actual)^2)
# 2.4. Residual sum of squares
ss_residuals <- sum((countyplot$pct_acreage - countyplot$predictions)^2)
# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)
RMSE = function(m, o){
sqrt(mean((m - o)^2))
}
MAE <- function(m, o)
{
mean(abs(m - o))
}
rmse <- round(RMSE(countyplot$predictions, countyplot$pct_acreage), 2)
mae <- round(MAE(countyplot$predictions, countyplot$pct_acreage), 2)
county_rmse_r2[ii,1] <- rmse
county_rmse_r2[ii,2] <- r2
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
NRMSE1 <- nrmse_func(finaldat$loss, finaldat$predictions)
NRMSE2 <- nrmse_func(finaldat_loss$loss, finaldat_loss$predictions_loss)
fit1 <- lm(form3, data=data5b)
#pct_acreage historical vs. predicted
yearlist <- data.frame(c(2001:2015))
colnames(yearlist) <- "year"
countyplot <- merge(countyplot, yearlist, by=("year"), all=T)
countyplot$predictions[is.na(countyplot$predictions)] <- 0
countyplot$pct_acreage[is.na(countyplot$pct_acreage)] <- 0
par(mar=c(5, 5.1, 2, 3), family = 'serif', mgp=c(3.8, 1, 0), las=0)
plot(c(countyplot$pct_acreage) ~ c(countyplot$year), cex.lab = 1.5, cex.axis = 1.5, col = "red",
las = 2, xaxt = "n", xlab = "Year", ylab = "Percent Acreage Drought Claims", main = paste(i, " County, ", countyplot$state[1], ": R2 = ", r2, " RMSE = ", rmse, sep=""))
lines(countyplot$pct_acreage ~ countyplot$year, col = "red")
points(countyplot$predictions ~ countyplot$year, col = "blue")
lines(countyplot$predictions ~ countyplot$year, col = "blue")
axis(1, countyplot$year, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.5)
legend("topright", legend=c("historical", "predicted"),
col=c("red", "blue"), lty=1, cex=1.5)
}
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
NRMSE1 <- nrmse_func(finaldat$loss, finaldat$predictions)
NRMSE2 <- nrmse_func(finaldat_loss$loss, finaldat_loss$predictions_loss)
#loss historical vs predicted
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
finaldat_loss$loss <- 10^finaldat_loss$loss
finaldat_loss$predictions_loss <- 10^finaldat_loss$predictions_loss
fd_loss <- aggregate(finaldat_loss$loss, by=list(finaldat_loss$year), FUN = "sum")
fd_pred <- aggregate(finaldat_loss$predictions_loss, by=list(finaldat_loss$year), FUN = "sum")
# 2.1. Average of actual data
avr_y_actual <- mean(fd_loss$x)
# 2.2. Total sum of squares
ss_total <- sum((fd_loss$x - avr_y_actual)^2)
# 2.3. Regression sum of squares
ss_regression <- sum((fd_pred$x - avr_y_actual)^2)
# 2.4. Residual sum of squares
ss_residuals <- sum((fd_loss$x - fd_pred$x)^2)
# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)
RMSE = function(m, o){
sqrt(mean((m - o)^2))
}
rmse <- round(RMSE(fd_pred$x, fd_loss$x), 2)
#plot all counties combined, by year
par(mar=c(6, 5.1, 2, 3), family = 'serif', mgp=c(3.5, .6, 0), las=0)
plot(fd_loss$x ~ fd_loss$Group.1, col = "red", cex.lab = 1.5, cex.axis = 1.3, las = 2, yaxt = "n", xaxt = "n", xlab = "Year", ylab = "Annual Wheat/Drought Loss", main = paste("24 county iPNW study area: R2 = ", r2, " RMSE = ", rmse, sep=""))
lines(fd_loss$x ~ fd_loss$Group.1, col = "red")
points(fd_pred$x ~ fd_pred$Group.1, col = "blue")
lines(fd_pred$x ~ fd_pred$Group.1, col = "blue")
axis(1, fd_loss$Group.1, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.3)
pts <- pretty(fd_loss$x / 1000000)
pts2 <- pretty(fd_loss$x)
axis(2, las = 1, cex.axis = 1.3, at = pts2, labels = paste(pts, "M", sep = ""))
legend("topleft", legend=c("historical", "predicted"),
col=c("red", "blue"), lty=1, cex=1.3)
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
finaldat_loss$loss <- 10^finaldat_loss$loss
finaldat_loss$predictions_loss <- 10^finaldat_loss$predictions_loss
finaldat_loss <- subset(finaldat_loss, county != "Kootenai" & county != "Clearwater")
countyvector <- unique(finaldat_loss$county)
county_rmse_r2 <- data.frame(matrix(ncol =2, nrow = 24))
county_rmse_r2$county <- countyvector
colnames(county_rmse_r2) <- c("rmse", "r2", "county")
ii = 0
for (i in countyvector ) {
ii <- ii + 1
#finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
#finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
#finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
countyplot <- subset(finaldat_loss, county == i)
yearlist <- data.frame(c(2001:2015))
colnames(yearlist) <- "year"
countyplot <- merge(countyplot, yearlist, by=("year"), all=T)
countyplot$predictions_loss[is.na(countyplot$predictions_loss)] <- 0
countyplot$loss[is.na(countyplot$loss)] <- 0
# 2.1. Average of actual data
avr_y_actual <- mean(countyplot$loss)
# 2.2. Total sum of squares
ss_total <- sum((countyplot$loss - avr_y_actual)^2)
# 2.3. Regression sum of squares
ss_regression <- sum((countyplot$predictions_loss - avr_y_actual)^2)
# 2.4. Residual sum of squares
ss_residuals <- sum((countyplot$loss - countyplot$predictions_loss)^2)
# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)
RMSE = function(m, o){
sqrt(mean((m - o)^2))
}
MAE <- function(m, o)
{
mean(abs(m - o))
}
rmse <- round(RMSE(countyplot$predictions_loss, countyplot$loss), 2)
mae <- round(MAE(countyplot$predictions_loss, countyplot$loss), 2)
county_rmse_r2[ii,1] <- rmse
county_rmse_r2[ii,2] <- r2
#plot all counties combined, by year
par(mar=c(6, 5.1, 2, 3), family = 'serif', mgp=c(3.5, .6, 0), las=0)
plot(c(countyplot$loss) ~ c(countyplot$year), cex.lab = 1.5, cex.axis = 1.5, col = "red",
las = 2, yaxt = "n", xaxt = "n", xlab = "Year", ylab = "Wheat/Drought Insurance Loss", main = paste(i, " County, Oregon: R2 = ", r2, " RMSE = ", rmse, sep="") )
lines(countyplot$loss ~ countyplot$year, col = "red")
points(countyplot$predictions_loss ~ countyplot$year, col = "blue")
lines(countyplot$predictions_loss ~ countyplot$year, col = "blue")
axis(1, countyplot$year, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.5)
pts <- pretty(countyplot$loss / 1000000)
pts2 <- pretty(countyplot$loss)
axis(2, las = 1, cex.axis = 1.3, at = pts2, labels = paste(pts, "M", sep = ""))
legend("topright", legend=c("historical", "predicted"),
col=c("red", "blue"), lty=1, cex=1.3)
}